* Create mover data

	use "${clean_data}/panel_physicians_clean.dta", clear
	
	* Restrict to age 40-55
	loc agerange = "40,55" 
	
	* Restrict the data to firms with at least 15 NPIs 
	loc firmsizemax = 15

	drop if einsize < `firmsizemax'
	gegen ein_adj=group(ein)
	
	* ein_adj and count firm moves in age-range 40-55
	xtset npi year
	
	gen moved2 = ein_adj != L.ein_adj if ein_adj != . & L.ein_adj !=. & inrange(age, `agerange') // Accounting for age-range
	gegen moves2 = sum(moved2), by(id)
	
	* Creates flags for never movers and single movers 
	* (omitted categories - do not observe consecutive years, or observe
	* more than one move)
	gen mover_type = "Never Movers" if moves2 == 0
	replace mover_type = "Single Movers" if moves2 == 1
	keep if inlist(mover_type, "Never Movers", "Single Movers")
	drop moves2
	
	* Creating relative year variable
	gen temp = year if  moved2 == 1
	gegen moveyear = max(temp), by(id)
	drop temp

	gen relyear = . 
	replace relyear = 0 if mover_type == "Never Movers"	
	replace relyear = year - moveyear if mover_type == "Single Movers" 
	replace relyear=relyear+16 // +16 to avoid negative factor variables	

	gsort id year
	assert !missing(mover_type)
	gisid id year
	gisid id relyear if mover_type == "Single Movers"
	assert relyear==16 if mover_type == "Never Movers"
	
	* Save mover data
	save "${intermediate_data}/twfe/physicians_movers_EINsize`firmsizemax'.dta", replace
	
	keep if inrange(age, `agerange') 
	keep id npi year cz90 ein_adj age spec_category_code ///
	spec_category_desc logptotinc relyear moveyear mover_type

* Regressions 

	gegen EINlogptotinc = mean(logptotinc), by(ein_adj)

	xtset id year 
	
	gen temp = EINlogptotinc - L.EINlogptotinc
	egen incdiff_pos = max(temp), by(id)
	egen incdiff_neg = min(temp), by(id)
	gen incdiff=incdiff_pos if incdiff_pos!=0
	replace incdiff=incdiff_neg if incdiff_neg!=0
	drop temp incdiff_pos incdiff_neg
	

		keep if mover_type == "Single Movers"
		reghdfe logptotinc if !missing(npi), a(ein_adj age id relyear, savefe) resid
		local N_UR = e(N)
		
		keep if e(sample)
		
		keep logptotinc cz90 ein_adj spec_category_code spec_category_desc id year __hdfe* _reghdfe_resid
		rename (__hdfe1__ __hdfe2__ __hdfe3__ __hdfe4__)  (__hdfeFIRM__ __hdfeAGE__ __hdfeINDIVID__ __hdfeRELYEAR__)
		gen N_UR = `N_UR'
		save "$output/twfe/firm_twfe/IndividFixedEffects_physicians_EINsize`firmsizemax'.dta", replace

		gcollapse logptotinc __hdfe*__ N_UR, by(ein_adj)
		save "$output/twfe/firm_twfe/FirmFixedEffects_physicians_EINsize`firmsizemax'.dta", replace


********* Process results of Two-Way Fixed Effects Regressions 
	
	use "$output/twfe/firm_twfe/IndividFixedEffects_physicians_EINsize`firmsizemax'.dta", clear
	rename  (__hdfeFIRM__ __hdfeAGE__ __hdfeINDIVID__ __hdfeRELYEAR__) (__hdfe1__ __hdfe2__ __hdfe3__ __hdfe4__) 
	local N_UR = `=N_UR[1]'
		preserve
		
			foreach y in 1 2 3 4 {
				qui sum __hdfe`y'__
				gen sd_hdfe`y' = `r(sd)'
				gen CoVar_hdfe`y' = `r(Var)'

				foreach z in 1 2 3 4 {
					if `y' < `z' {
						qui corr __hdfe`y'__ __hdfe`z'__
						gen  Corr_hdfe`y'`z' = r(C)[1,2]

						qui corr __hdfe`y'__ __hdfe`z'__, cov
						gen CoVar_hdfe`y'`z' = `r(cov_12)'
					}
				}
			}

			* Variance of the residual
			qui sum _reghdfe_resid 
			gen CoVar_resid = `r(Var)'

			gcollapse (sd) sd_logptotinc = logptotinc (first) sd_* Corr_* CoVar_*

			gen CoVar_logptotinc = sd_logptotinc * sd_logptotinc

			gen CoVar_recomposed =	CoVar_hdfe1 + CoVar_hdfe2 + CoVar_hdfe3 + CoVar_hdfe4 ///
				+ 2*CoVar_hdfe12 + 2*CoVar_hdfe13 + 2*CoVar_hdfe14 + 2*CoVar_hdfe23 + 2*CoVar_hdfe24 + 2*CoVar_hdfe34 + CoVar_resid

			gen sd_recomposed = sqrt(CoVar_recomposed)

			gen spec = "Person-Year Level"

			save "${temp}/personyear_physicians_EINsize`firmsizemax'.dta", replace

		restore
		
		gcollapse logptotinc __hdfe* _reghdfe_resid, by(ein_adj)

		foreach y in 1 2 3 4 {
			qui sum __hdfe`y'__, d
			gen sd_hdfe`y' = `r(sd)'
			gen CoVar_hdfe`y' = `r(Var)'

			foreach z in 1 2 3 4 {
				if `y' < `z' {
					qui corr __hdfe`y'__ __hdfe`z'__
					gen  Corr_hdfe`y'`z' = r(C)[1,2]

					qui corr __hdfe`y'__ __hdfe`z'__, cov
					gen CoVar_hdfe`y'`z' = `r(cov_12)'
				}
			}
		}

		* Variance of the residual
		qui sum _reghdfe_resid 
		gen CoVar_resid = `r(Var)'

		gcollapse (sd) sd_logptotinc = logptotinc (first) sd_* Corr_* CoVar_*

		gen CoVar_logptotinc = sd_logptotinc * sd_logptotinc

		gen CoVar_recomposed =	CoVar_hdfe1 + CoVar_hdfe2 + CoVar_hdfe3 + CoVar_hdfe4 ///
			+ 2*CoVar_hdfe12 + 2*CoVar_hdfe13 + 2*CoVar_hdfe14 + 2*CoVar_hdfe23 + 2*CoVar_hdfe24 + 2*CoVar_hdfe34 + CoVar_resid

		gen sd_recomposed = sqrt(CoVar_recomposed)

		gen spec = "Firm Level"
		
		append using "${temp}/personyear_physicians_EINsize`firmsizemax'.dta"
		
		reshape long sd_ CoVar_ Corr_, i(spec) j(effect) string
		
		replace effect = subinstr(effect, "12", "Firm/Age", .)
		replace effect = subinstr(effect, "13", "Firm/Individ", .)
		replace effect = subinstr(effect, "14", "Firm/RelYear", .)
		replace effect = subinstr(effect, "23", "Age/Individ", .)
		replace effect = subinstr(effect, "24", "Age/RelYear", .)
		replace effect = subinstr(effect, "34", "Individ/RelYear", .)

		replace effect = subinstr(effect, "1", "Firm", .)
		replace effect = subinstr(effect, "2", "Age", .)
		replace effect = subinstr(effect, "3", "Individ", .)
		replace effect = subinstr(effect, "4", "RelYear", .)
		replace effect = subinstr(effect, "hdfe", "", .)
		
		gen id = . 
		replace id = 1 if effect == "logptotinc"
		replace id = 2 if effect == "recomposed"
		replace id = 3 if effect == "Firm"
		replace id = 4 if effect == "Age"
		replace id = 5 if effect == "Individ"
		replace id = 6 if effect == "RelYear"
		replace id = 7 if effect == "Firm/Age"
		replace id = 8 if effect == "Firm/Individ"
		replace id = 9 if effect == "Firm/RelYear"
		replace id = 10 if effect == "Age/Individ"
		replace id = 11 if effect == "Age/RelYear"
		replace id = 12 if effect == "Individ/RelYear"

		bys spec: gen temp = CoVar_ if effect == "recomposed"
		egen temp2 = max(temp), by(spec)
		by spec: gen Var_share = CoVar_ / temp2
		
		gen N_UR = `N_UR'
		
		order spec, first 
		gsort -spec id
		drop id temp temp2

		save "$output/twfe/firm_twfe/earnings_decomp_card_controls_physicians_EINsize`firmsizemax'", replace

	* Co-location of "productive" firms and people?

	loc firmsizemax = 15
	use "$output/twfe/firm_twfe/IndividFixedEffects_physicians_EINsize`firmsizemax'.dta", clear
	keep id cz90 ein_adj __hdfeFIRM__ __hdfeINDIVID__
	duplicates drop
	ren (__hdfeINDIVID__ __hdfeFIRM__) (individfe firmfe)
	
	binscatter individfe firmfe, genxq(firmfebin_residyear) ytitle("Individual FE", height(10)) xtitle("Firm FE") reportreg note("Based on AKM model with person and EIN FEs.")
		reg individfe firmfe, robust
		gen residyear_slope = e(b)[1,1]
		gen residyear_cons = e(b)[1,2]

	binscatter individfe firmfe, genxq(firmfebin_residczyear) absorb(cz90) ytitle("Individual FE", height(10)) xtitle("Firm FE") reportreg note("Based on AKM model with person and EIN FEs. Conditional on CZ f.e.")
		reghdfe individfe firmfe, absorb(cz90)
		gen residczyear_slope = e(b)[1,1]
		gen residczyear_cons = e(b)[1,2]
	
	* Export graphs
	su individfe
	loc mean_indiv_fe = `r(mean)'
	su firmfe
	loc mean_firm_fe = `r(mean)'
		
		// Condition on year FEs
		reghdfe individfe , noabsorb resid
			local N_indivresidyear = e(N)
			g residyear_indivfe = _reghdfe_resid + `mean_indiv_fe'	
		reghdfe firmfe , noabsorb resid
			local N_firmresidyear = e(N)
			g residyear_firmfe = _reghdfe_resid + `mean_firm_fe'
			
			assert `N_indivresidyear' == `N_firmresidyear'
		
		// Condition on year and cz FEs
		reghdfe individfe, absorb(cz90) resid
			local N_indivresidczyear = e(N)
			g residczyear_indivfe = _reghdfe_resid + `mean_indiv_fe'
		reghdfe firmfe, absorb(cz90) resid
			local N_firmresidczyear = e(N)
			g residczyear_firmfe = _reghdfe_resid + `mean_firm_fe'
			
			assert `N_indivresidczyear' == `N_firmresidczyear'
			
		foreach resid in residyear residczyear {
			preserve
				gcollapse (mean) `resid'* (count) N_UR_ein_`resid' = ein_adj, by(firmfebin_`resid')
				keep *`resid'*		
				ren firmfebin_`resid' firmfebin
				g N_UR_`resid' = `N_firm`resid''
				drbcount N_UR_ein_`resid', gen(N_ein_`resid')			
				tempfile temp`resid'
				save `temp`resid''
			restore
		}
		
		use `tempresidyear', clear
		merge 1:1 firmfebin using `tempresidczyear', nogen assert(3)
		ds *slope *cons
		drbest_docinc residczyear_firmfe residczyear_indivfe residyear_firmfe residyear_indivfe `r(varlist)'
		order N_UR*, last
		export delimited using "${mypath}/intermediate_csv/firmtwfe01-Individual_vs_firm_FE.csv", replace dataf delim(tab)  			
